#====================
# Used libraries


import pandas as pd
import numpy as np


#classifiers
from sklearn import tree
from sklearn import neighbors
from sklearn.linear_model import LogisticRegression


from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix


import pickle
from itertools import combinations
#===========================================================
#===========================================================

#Obtain data from Excel file
path1='Planilla_pruebas.xlsx'

Data_df = pd.read_excel(path1) 

#Change format to numpy
Data=Data_df.to_numpy()

XX = Data[3:22,:] 
XY = Data[23:30,:]
XZ = Data[31:,:]

Data = np.concatenate((XX,XY,XZ),axis=0)

#Input variables
X = Data[:,1:6]
#output variable
y = Data[:,6]

#===========================================================
#===========================================================

# Obtaining different combinations for the 5 input variables
# Five variables generate 31 posible sets of different combinations

input_var = [0, 1, 2, 3, 4]

combinat = sum([list(map(list, combinations(input_var, i))) for i in range(len(input_var) + 1)], [])

# Select one combination between 1 and 31
# Combination 1 is [0] -> educational level
# Combination 3 is [2] -> TMT
# Combination 7 is [0,2] -> educational level, TMT

combination_assessed = combinat[7]

print (combination_assessed)

# Classifier input variables
X=X[:,combination_assessed]
# #===========================================================

# #============Some experimental settings for model assessing=====================

# number of iterations for the bootstrapping sampling method 
num_exp=100
# save the trained models for the 100 boostrapping sets
models = []
# save the confusion matrices for the 100 boostrapping sets
confusion_matrices_test=[]

# #===========================================================

# #===========================================================
# Coding the MiniBESTest into a binary varible

y_codif=np.zeros([y.shape[0]])

thresh=22 

for n in range(0,y.shape[0]):
    
    if y[n] < thresh:
        y_codif[n]=0       
    else:
        y_codif[n]=1       

# #===========================================================

for i in range(0,num_exp):

    # #===========================================================
    # Splitting data into trainning and assessing data
    X_train, X_test, y_train, y_test = train_test_split(X, y_codif, test_size=0.3)
    # #===========================================================
    
    
    #-----------------------Classifiers----------------------
    # By commenting/uncommenting the different lines each classifier can be assessed 
    #===========================================================
    
    #-----------------------LogisticRegression----------------------
    Clasif = LogisticRegression(C=0.1, class_weight = "balanced")
    
   
    # #-----------------------Trees----------------------
    # Clasif = tree.DecisionTreeClassifier(class_weight="balanced", min_samples_split=15)
   
    
    # #-----------------------KNN----------------------
    # n_neighbors = 1 #1,2,3,4,5
    
    # weights="uniform"
    # # weights="distance"
    
    # Clasif = clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)


    # trainning/assessing stage 
    y_pred = Clasif.fit(X_train, y_train).predict(X_test)
        
    #===========================================================
    
    # ---------------obtaining the confusion matrix for each iteration-------------------
    conf_mat_test=confusion_matrix(y_test, y_pred)
    
    #===========================================================

    # ------saving the trained model and confusion matrix for each iteration-----------
    models.append(Clasif)
    confusion_matrices_test.append(conf_mat_test)
       
    
# #============ Saving results to files =======================
# with open("models02_v3.pckl", "wb") as f:
#     for model in models:
#           pickle.dump(model, f)
# with open("matrices_conf_train02_v3.pckl", "wb") as f:
#     pickle.dump(confusion_matrices_train, f) 
# with open("matrices_conf_test02_v3.pckl", "wb") as f:
#     pickle.dump(confusion_matrices_test, f)                  
# #===========================================================         
# #============ loading data from files ============================
# models = []
# with open("models.pckl", "rb") as f:
#     while True:
#         try:
#             models.append(pickle.load(f))
#         except EOFError:
#             break

# with open("matrices_conf_train.pckl", "rb") as f:
#     confusion_matrices_train=pickle.load(f)
# with open("matrices_conf_test.pckl", "rb") as f:
#     confusion_matrices_test=pickle.load(f)         

# #============ Calculating aggregated metrics =======================
#===========================================================

MA_test=np.zeros((2,2))

for i in range(0,num_exp):
    MA_test=MA_test+confusion_matrices_test[i]

mean_acuracy=(MA_test[0][0]+MA_test[1][1])/(np.sum(MA_test))
mean_TPR=MA_test[0][0]/(MA_test[0][0]+MA_test[0][1])
mean_TNR=MA_test[1][1]/(MA_test[1][0]+MA_test[1][1])
mean_PREC_enf=MA_test[0][0]/(MA_test[0][0]+MA_test[1][0])
mean_PREC_sano=MA_test[1][1]/(MA_test[0][1]+MA_test[1][1])
mean_recall_enf=MA_test[0][0]/(MA_test[0][0]+MA_test[0][1])
mean_recall_sano=MA_test[1][1]/(MA_test[1][0]+MA_test[1][1])

class_mean_acuracy=0.5*(mean_recall_enf + mean_recall_sano)


print('\n MC Test:')
print(MA_test)   
print('\n Accuracy: %.3f' % mean_acuracy)
print('\n Accuracy x class: %.3f' % class_mean_acuracy)
print('\n TPR: %.3f' % mean_TPR)
print('\n TNR: %.3f' % mean_TNR)
print('\n Precision enf: %.3f' % mean_PREC_enf)
print('\n Precision sano: %.3f' % mean_PREC_sano)
 


